# functions to compare professionals and non-professionals

roundr <- function(value, dgts = 3) {
  return(format(round(value, digits = dgts), nsmall = dgts))
}

ttest_p <- function(var_outcome, dt, var_prof, weight_var) {
  design <- svydesign(ids = ~1, weights = ~ dt[[weight_var]], data = dt)
  result <- svyttest(as.formula(paste(var_outcome, "~", var_prof)), design)

  return(result$p.value)
}

chisq_p <- function(var_outcome, dt, var_prof, weight_var) {
  if (nrow(table(dt[, var_outcome], dt[, var_prof])) > 1) {
    design <- svydesign(ids = ~1, weights = ~ dt[[weight_var]], data = dt)
    result <- svychisq(as.formula(paste("~", var_outcome, "+", var_prof)), design)
    p.value <- result$p.value
  } else {
    p.value <- NA
  }

  return(p.value)
}

ks_p <- function(var_outcome, dt, var_prof, weight_var) {
  design <- svydesign(ids = ~1, weights = dt[[weight_var]], data = dt)
  dt$weighted_value <- weights(design) * dt[[var_outcome]]

  dt_pro <- dt[dt[, var_prof] == 1, "weighted_value"]
  dt_nonpro <- dt[dt[, var_prof] == 0, "weighted_value"]

  result <- ks.test(dt_pro, dt_nonpro)

  return(result$p.value)
}

make_table <- function(
    dt, var_prof, outcomes_prop, outcomes_median, outcomes_mean_raw, outcomes_mean_01,
    order, weight_var, output = "html") {
  
  # Convert data to survey design
  survey_design <- dt %>%
    as_survey_design(weights = !!sym(weight_var))

  # Compute weighted means and SEs
  if (!is.null(outcomes_prop) | !is.null(outcomes_mean_01) | !is.null(outcomes_mean_raw)) {
    summ_mean <- survey_design %>%
      group_by(!!sym(var_prof)) %>%
      summarize(across(any_of(c(outcomes_prop, outcomes_mean_01, outcomes_mean_raw)),
                       list(mean = ~ survey_mean(., vartype = "se", na.rm = TRUE)),
                       .names = "{.col}_{.fn}"
      )) %>%
      pivot_longer(
        cols = -!!sym(var_prof),
        names_to = "variable",
        values_to = "values"
      ) %>%
      mutate(
        parameter = ifelse(str_ends(variable, "mean"), "mean", "se"),
        variable = str_remove_all(variable, "_mean|_se|")
      ) %>%
      pivot_wider(names_from = parameter, values_from = values) %>%
      mutate(professional = ifelse(!!sym(var_prof) == 1, "pro", "nonpro")) %>%
      select(-!!sym(var_prof)) %>%
      # transform proportions to percentages
      mutate(
        mean = ifelse(variable %in% outcomes_prop, mean * 100, mean),
        se = ifelse(variable %in% outcomes_prop, se * 100, se)
      ) %>%
      pivot_wider(names_from = "professional", values_from = c("mean", "se"))
  } else {
    summ_mean <- data.frame()
  }
  
  # Compute weighted median and SEs
  if (!is.null(outcomes_median)) {
    summ_median <- survey_design %>%
      group_by(!!sym(var_prof)) %>%
      summarize(across(any_of(c(outcomes_median)),
        list(mean = ~ survey_median(., vartype = "se", na.rm = TRUE)),
        .names = "{.col}_{.fn}"
      )) %>%
      pivot_longer(
        cols = -!!sym(var_prof),
        names_to = "variable",
        values_to = "values"
      ) %>%
      mutate(
        parameter = ifelse(str_ends(variable, "mean"), "mean", "se"),
        variable = str_remove_all(variable, "_mean|_se|")
      ) %>%
      pivot_wider(names_from = parameter, values_from = values) %>%
      mutate(professional = ifelse(!!sym(var_prof) == 1, "pro", "nonpro")) %>%
      select(-!!sym(var_prof)) %>%
      pivot_wider(names_from = "professional", values_from = c("mean", "se"))
  } else {
    summ_median <- data.frame()
  }

  tab <- bind_rows(summ_mean, summ_median) %>%
    as.data.frame() %>%
    # round depending on variable type
    mutate_if(is.numeric, ~ case_when(
      variable %in% outcomes_median ~ roundr(., 0),
      variable %in% outcomes_prop ~ roundr(., 1),
      variable %in% outcomes_mean_raw ~ roundr(., 1),
      variable %in% outcomes_mean_01 ~ roundr(., 2)
    )) %>%
    mutate(
      pro = trimws(paste0(mean_pro, " (", trimws(se_pro), ")")),
      nonpro = trimws(paste0(mean_nonpro, " (", trimws(se_nonpro), ")"))
    ) %>%
    mutate(variable = factor(variable, ordered = TRUE, levels = order)) %>%
    arrange(variable) %>%
    mutate(variable = gsub("_w[0-9]|_num", "", .$variable)) %>%
    select(variable, pro, nonpro)

  # calculate p-values
  pvalues <- c(
    sapply(outcomes_prop, chisq_p, 
           dt = dt, var_prof = var_prof, weight_var = weight_var, simplify = F),
    sapply(c(outcomes_mean_raw, outcomes_mean_01), ttest_p, 
           dt = dt, var_prof = var_prof, weight_var = weight_var, simplify = F),
    sapply(outcomes_median, ks_p, 
           dt = dt, var_prof = var_prof, weight_var = weight_var, simplify = F)
  )

  pvalues <- data.frame(p = unlist(pvalues), variable = names(unlist(pvalues))) %>%
    mutate(variable = str_extract(variable, "^[^.]+"))

  tab <- tab %>%
    left_join(., pvalues)

  if (output == "html") {
    tab <- tab %>%
      mutate(sig = case_when(
        p < 0.001 ~ "***",
        p < 0.01 ~ "**",
        p < 0.05 ~ "*",
        p < 0.10 ~ "○"
      ))
  } else {
    tab <- tab %>%
      mutate(sig = case_when(
        p < 0.001 ~ "\\star\\star\\star",
        p < 0.01 ~ "\\star\\star",
        p < 0.05 ~ "\\star",
        p < 0.10 ~ "\\circ"
      ))
  }

  return(tab)
}

export_kable_sociodem <- function(dt, caption, format, file, label = NULL) {
  kable(
    dt, caption = caption,
    format = format, booktabs = T, escape = F, linesep = "",
    row.names = F, label = label,
    col.names = c("", "U.S. population", "Professionals", "", "Non-professionals", 
                  "Professionals", "", "Non-professionals", 
                  "Professionals", "", "Non-professionals")) %>%
    kable_styling(full_width = F, latex_options = c("HOLD_position", "scale_down")) %>%
    add_header_above(c(" " = 2, "Facebook" = 3, "Lucid" = 3, "Yougov" = 3)) %>%
    pack_rows("Sociodemographics", 1, 4) %>%
    pack_rows("Political outcomes", 5, 10) %>%
    column_spec(1, width = "7cm") %>%
    footnote(general = "Standard errors of means in parentheses. Significance of differences between professionals and non-professionals were tested with a Kolgomorov-Smirnoff test for age, chi-squared tests for gender, education and race, and t-tests for all other variables (\\\\circ p < 0.1; \\\\star p < 0.05; \\\\star\\\\star p < 0.01; \\\\star\\\\star\\\\star p < 0.001). Sociodemographic population data from the US Census; political variables from ANES 2020. Variables trust, political interest, knowledge and partisanship were recoded to a scale from 0 to 1 to ensure comparability.",
             threeparttable = T) %>%
    save_kable(., file = file)
}

export_kable_quality <- function(dt, caption, format, file, label = NULL) {
  kable(
    dt, caption = caption,
    format = format, booktabs = T, escape = F, linesep = "",
    row.names = F, label = label,
    col.names = c("", "Professionals", "", "Non-professionals", 
                  "Professionals", "", "Non-professionals", 
                  "Professionals", "", "Non-professionals")) %>%
    kable_styling(full_width = F, latex_options = c("HOLD_position", "scale_down")) %>%
    add_header_above(c(" " = 1, "Facebook" = 3, "Lucid" = 3, "Yougov" = 3)) %>%
    column_spec(1, width = "7cm") %>%
    footnote(general = "Standard errors in parentheses. Significance of differences between professionals and non-professionals were tested with a Kolgomorov-Smirnoff test for survey duration, and chi-squared tests for the proportion of those 30/40/50 percent faster than the median duration and for proportion of straightliners ($\\circ$ p < 0.1; $\\star$ p < 0.05; $\\star\\star$p < 0.01; $\\star\\star\\star$ p < 0.001).",
             threeparttable = T) %>%
    save_kable(., file = file)
}

export_kable_repeated <- function(dt, caption, format, file, label = NULL) {
  kable(
    dt, caption = caption,
    format = format, booktabs = T, escape = F,
    label = label, linesep = "\\addlinespace",
    col.names = c("", "Facebook", "Lucid", "Yougov"), row.names = F) %>%
    kable_styling(full_width = F, latex_options = c("HOLD_position", "scale_down")) %>%
    footnote(general = "Standard errors in parentheses.",
             threeparttable = T) %>%
    save_kable(., file =file)
}

export_kable_repeated_prof <- function(dt, caption, format, file, label = NULL) {
  kable(
    dt, caption = caption,
    format = format, booktabs = T, escape = F,
    label = label, linesep = "\\addlinespace",
    col.names = c("", "Professionals", "Non-professionals", 
                  "Professionals", "Non-professionals", 
                  "Professionals", "Non-professionals")) %>%
    add_header_above(c(" " = 1, "Facebook" = 2, "Lucid" = 2, "Yougov" = 2)) %>%
    kable_styling(full_width = F, latex_options = c("HOLD_position", "scale_down")) %>%
    footnote(general = "Standard errors in parentheses.",
             threeparttable = T) %>%
    save_kable(., file =file)
}

export_kable_repeated_groups <- function(dt, caption, format, file, 
                                         colnames, label = NULL) {
  kable(
    dt, caption = caption,
    format = format, booktabs = T, escape = F,  
    label = label, linesep = "\\addlinespace", row.names = F,
    col.names = colnames) %>%
    add_header_above(c(" " = 1, "Facebook" = 2, "Lucid" = 2, "Yougov" = 2)) %>%
    kable_styling(full_width = F, latex_options = c("HOLD_position", "scale_down")) %>%
    save_kable(., file = file)
}


